# TODO 反爬虫，暂时未爬取

import requests

from bs4 import BeautifulSoup
import mysql.connector

def pa(p):

    #第几页
    print(p)
    #学校名称
    schoolName = "华南理工大学"

    #爬取链接
    url = "http://jyzx.6ihnep7.cas.scut.edu.cn/xs/zpxx/xyxj?pageNo=1&daoxv1=-1&entName=&time=-1&pageNO="+p

    #请求头
    headers = {
        "Cookie": "__status__=;JSESSIONID=6F18181337448D131BFF5B3FF74003F2",
        "Accept-Encoding": "gzip,deflate",
        "Host": "jyzx.6ihnep7.cas.scut.edu.cn"
    }
    #爬取数据并解析
    html = requests.get(url=url, headers=headers)
    html.encoding = "utf-8"

    soup = BeautifulSoup(html.text, 'lxml')
    print(soup)

    #找到所有li标签
    dataList = soup.find_all("li")

    for data in dataList:
        #createTime = data.find("span").text
        #jobTitle = data.find("a").text
        #print(createTime+"|"+jobTitle)
        print(data)
        # mysql_conn(jobTitle,schoolName,createTime)

#数据库连接
def mysql_conn(job_title,school_name,create_time):
    mydb = mysql.connector.connect(
        host="127.0.0.1",
        user="root",
        passwd="",
        database="hschool_job"
    )
    mycursor = mydb.cursor()

    #插入
    sql = "INSERT INTO h_job (job_title,school_name,create_time) VALUES (%s,%s,%s)"
    val = (job_title,school_name,create_time)
    mycursor.execute(sql, val)
    mydb.commit()

if __name__ == '__main__':
    pa(str(1))
